#####
# Crossing Borders: Data Organizing
#####

library(here)
library(qs)

source(here("code", "functions.R"))

#####
# A. Loading component data
#####

###
# 1. prepare the aggregate population data needed for the analysis and for updating the time-invariant mesaures
source(here("code", "cleaning", "muniPopCleaning.R"))

###
# 2. Update the time-invariant measures to the 2019-vintage municipalities
source(here("code", "cleaning", "timeConstantCleaning.R"))

###
# 3. Pre-process the election data (1991-2019). Pre-aggregated to the 2019-01-01 municipalities by the BfS!
source(here("code", "cleaning", "elecCleaning.R"))

###
# 4. Pre-process the PETRA data (1990-2009). 
source(here("code", "cleaning", "petraCleaning.R"))

###
# 5. Pre-process the STATPOP data (2010-2014)
source(here("code", "cleaning", "statpopCleaning.R"))

###
# 6. Pre-process the CBW register data (1996--2016)
source(here("code", "cleaning", "cbwCleaning.R"))

#####
# B. Merging components
#####

# 1. Combine municipal populatin and election data. New data is CB
cb <- merge(elecFR, muniPop, by = c("year", "bfs19"), all = TRUE)

# 2. Stack PETRA and STATPOP
ps <- rbindlist(list(petra, statpop))

# 3. Merge ps and cb
cb <- merge(cb, ps, by = c("year", "bfs19"), all = TRUE)#; rm(ps)

# 4. Merge cb with the CBW register
cb <- merge(cb, cbwYear, by = c("year", "bfs19"), all = TRUE)

# 5. Add the time-invariant data into cb
cb <- merge(cb, tc, by = "bfs19", all = T)

#####
# C. Creating variables
#####

# 1. Indicators for liberalization periods
set(cb, j = "transition", value = as.integer(cb$year %between% c(2000,2003)))
set(cb, j = "free", value = as.integer(cb$year > 2003))

# 2. Easier to use treatment indicators for did
set(cb, j = "transBorder15", value = cb$border15 * cb$transition)
set(cb, j = "freeBorder15", value = cb$border15 * cb$free)

set(cb, j = "transBorder0", value = cb$border0 * cb$transition)
set(cb, j = "freeBorder0", value = cb$border0 * cb$free)

set(cb, j = "transBorder30", value = cb$border30 * cb$transition)
set(cb, j = "freeBorder30", value = cb$border30 * cb$free)

set(cb, j = "transBorder5", value = cb$border5 * cb$transition)
set(cb, j = "freeBorder5", value = cb$border5 * cb$free)

# 3. Measures of "d-aytime f-oreign" (A-ggregate or R-egister)
set(cb, j = "dfA", value = cb$popJanForiegn + cb$cbw)
set(cb, j = "dfR", value = cb$all_imm + cb$cbw)

# Day Foreign German, French, Italian, and Austrian
set(cb, j = "dfDE", value = cb$germany + cb$cbwDE)
set(cb, j = "dfFR", value = cb$france + cb$cbwFR)
set(cb, j = "dfIT", value = cb$italy + cb$cbwIT)
set(cb, j = "dfOE", value = cb$austria + cb$cbwOE)



# create versions of these normalized by the fixed 1998 Swiss population
cb[ , dfA98 := dfA/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , dfR98 := dfR/popJanSwiss[year == 1998]  * 100, by = bfs19]

# create a version of these that just uses the "C"urrent year's swiss population
set(cb, j = "dfAC", value = cb$dfA/cb$popJanSwiss * 100)
set(cb, j = "dfRC", value = cb$dfR/cb$popJanSwiss * 100)

# 4. Versions of the CBW and foreign-resident variables that normalize
cb[ , dfA98 := dfA/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , dfR98 := dfR/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , immA98 := popJanForiegn/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , immR98 := all_imm/popJanSwiss[year == 1998]  * 100, by = bfs19]
cb[ , cbw98 := cbw/popJanSwiss[year == 1998] * 100, by = bfs19]

cb[ , dfDE98 := dfDE/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , dfFR98 := dfFR/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , dfIT98 := dfIT/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , dfOE98 := dfOE/popJanSwiss[year == 1998] * 100, by = bfs19]

cb[ , immDE98 := germany/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , immFR98 := france/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , immIT98 := italy/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , immOE98 := austria/popJanSwiss[year == 1998] * 100, by = bfs19]

cb[ , cbwDE98 := cbwDE/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , cbwFR98 := cbwFR/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , cbwIT98 := cbwIT/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , cbwOE98 := cbwOE/popJanSwiss[year == 1998] * 100, by = bfs19]
cb[ , cbwOther98 := cbwOther/popJanSwiss[year == 1998] * 100, by = bfs19]


set(cb, j = "dfAC", value = cb$dfA/cb$popJanSwiss  * 100)
set(cb, j = "dfRC", value = cb$dfR/cb$popJanSwiss  * 100)
set(cb, j = "immAC", value = cb$popJanForiegn/cb$popJanSwiss  * 100)
set(cb, j = "immRC", value = cb$all_imm/cb$popJanSwiss  * 100)
set(cb, j = "cbwC", value = cb$cbw/cb$popJanSwiss * 100)

set(cb, j = "dfDEC", value = cb$dfDE/cb$popJanSwiss * 100)
set(cb, j = "dfFRC", value = cb$dfFR/cb$popJanSwiss * 100)
set(cb, j = "dfITC", value = cb$dfIT/cb$popJanSwiss * 100)
set(cb, j = "dfOEC", value = cb$dfOE/cb$popJanSwiss * 100)

set(cb, j = "immDEC", value = cb$germany/cb$popJanSwiss * 100)
set(cb, j = "immFRC", value = cb$france/cb$popJanSwiss * 100)
set(cb, j = "immITC", value = cb$italy/cb$popJanSwiss * 100)
set(cb, j = "immOEC", value = cb$austria/cb$popJanSwiss * 100)

set(cb, j = "cbwDEC", value = cb$cbwDE/cb$popJanSwiss * 100)
set(cb, j = "cbwFRC", value = cb$cbwFR/cb$popJanSwiss * 100)
set(cb, j = "cbwITC", value = cb$cbwIT/cb$popJanSwiss * 100)
set(cb, j = "cbwOEC", value = cb$cbwOE/cb$popJanSwiss * 100)
set(cb, j = "cbwOtherC", value = cb$cbwOther/cb$popJanSwiss * 100)

# 5. For weighting, just assume that population in December 2018 is the same as January 2019 (this holds for every other year from the aggregate data)
setkey(cb, bfs19, year)
cb[year == 2019, on = "bfs19", `:=`(popJanTotal = cb[year==2018, popDecTotal], popJanForiegn = cb[year==2018, popDecForeign], popJanSwiss = cb[year==2018, popDecSwiss])]

# 6. Indicators for event-study models
for(y in c(1987:2019)){
  set(cb, j = paste0("border15_", y), value = as.integer(cb$year == y) * cb$border15)
}

# 7. Save the output in a qs object for quick and easy loading when needed
qsave(cb, here("data", "cb.qs"))


# VOX Cleaning ------------------------------------------------------------
source(here("code", "cleaning", "voxCleaning.R"))


# SHP Cleaning ------------------------------------------------------------
source(here("code", "cleaning", "shpCleaning.R"))


# Ticino Parliament Cleaning ------------------------------------------------------------
source(here("code", "cleaning", "ticino_clean.R"))
source(here("code", "cleaning", "ticino_merge.R"))